In [12]:
!pip install seaborn
!pip install plotly
!pip install numpy
Requirement already satisfied: seaborn in c:\users\abane\music\da_prject internship\env\lib\site-packages (0.13.2)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from seaborn) (2.3.3)
Requirement already satisfied: pandas>=1.2 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from seaborn) (2.3.3)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from seaborn) (3.10.6)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.3)
Requirement already satisfied: cycler>=0.10 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.60.1)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.9)
Requirement already satisfied: packaging>=20.0 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (25.0)
Requirement already satisfied: pillow>=8 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (11.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.5)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from pandas>=1.2->seaborn) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from pandas>=1.2->seaborn) (2025.2)
Requirement already satisfied: six>=1.5 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)
[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
Requirement already satisfied: plotly in c:\users\abane\music\da_prject internship\env\lib\site-packages (6.3.1)
Requirement already satisfied: narwhals>=1.15.1 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from plotly) (2.6.0)
Requirement already satisfied: packaging in c:\users\abane\music\da_prject internship\env\lib\site-packages (from plotly) (25.0)
[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
Requirement already satisfied: numpy in c:\users\abane\music\da_prject internship\env\lib\site-packages (2.3.3)
[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
In [9]:
!pip install matplotlib
Collecting matplotlib
  Downloading matplotlib-3.10.6-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.3-cp311-cp311-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.60.1-cp311-cp311-win_amd64.whl.metadata (114 kB)
     ---------------------------------------- 0.0/114.6 kB ? eta -:--:--
     --- ------------------------------------ 10.2/114.6 kB ? eta -:--:--
     ------------------- ----------------- 61.4/114.6 kB 825.8 kB/s eta 0:00:01
     -------------------------------------- 114.6/114.6 kB 1.1 MB/s eta 0:00:00
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.9-cp311-cp311-win_amd64.whl.metadata (6.4 kB)
Requirement already satisfied: numpy>=1.23 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib) (2.3.3)
Requirement already satisfied: packaging>=20.0 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib) (25.0)
Collecting pillow>=8 (from matplotlib)
  Using cached pillow-11.3.0-cp311-cp311-win_amd64.whl.metadata (9.2 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from matplotlib) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)
Downloading matplotlib-3.10.6-cp311-cp311-win_amd64.whl (8.1 MB)
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
    --------------------------------------- 0.2/8.1 MB 5.0 MB/s eta 0:00:02
   - -------------------------------------- 0.4/8.1 MB 4.5 MB/s eta 0:00:02
   -- ------------------------------------- 0.6/8.1 MB 4.6 MB/s eta 0:00:02
   --- ------------------------------------ 0.8/8.1 MB 4.6 MB/s eta 0:00:02
   ---- ----------------------------------- 1.0/8.1 MB 4.4 MB/s eta 0:00:02
   ----- ---------------------------------- 1.2/8.1 MB 4.4 MB/s eta 0:00:02
   ------- -------------------------------- 1.4/8.1 MB 4.6 MB/s eta 0:00:02
   -------- ------------------------------- 1.6/8.1 MB 4.6 MB/s eta 0:00:02
   --------- ------------------------------ 1.8/8.1 MB 4.5 MB/s eta 0:00:02
   ---------- ----------------------------- 2.1/8.1 MB 4.5 MB/s eta 0:00:02
   ----------- ---------------------------- 2.3/8.1 MB 4.6 MB/s eta 0:00:02
   ------------ --------------------------- 2.5/8.1 MB 4.6 MB/s eta 0:00:02
   ------------- -------------------------- 2.8/8.1 MB 4.7 MB/s eta 0:00:02
   -------------- ------------------------- 3.0/8.1 MB 4.6 MB/s eta 0:00:02
   --------------- ------------------------ 3.1/8.1 MB 4.7 MB/s eta 0:00:02
   --------------- ------------------------ 3.2/8.1 MB 4.4 MB/s eta 0:00:02
   ---------------- ----------------------- 3.4/8.1 MB 4.3 MB/s eta 0:00:02
   ----------------- ---------------------- 3.6/8.1 MB 4.3 MB/s eta 0:00:02
   ------------------- -------------------- 3.9/8.1 MB 4.5 MB/s eta 0:00:01
   -------------------- ------------------- 4.1/8.1 MB 4.5 MB/s eta 0:00:01
   --------------------- ------------------ 4.4/8.1 MB 4.5 MB/s eta 0:00:01
   ---------------------- ----------------- 4.6/8.1 MB 4.5 MB/s eta 0:00:01
   ---------------------- ----------------- 4.6/8.1 MB 4.5 MB/s eta 0:00:01
   ------------------------- -------------- 5.2/8.1 MB 4.7 MB/s eta 0:00:01
   -------------------------- ------------- 5.4/8.1 MB 4.6 MB/s eta 0:00:01
   --------------------------- ------------ 5.6/8.1 MB 4.6 MB/s eta 0:00:01
   ---------------------------- ----------- 5.9/8.1 MB 4.7 MB/s eta 0:00:01
   ----------------------------- ---------- 6.1/8.1 MB 4.7 MB/s eta 0:00:01
   ------------------------------- -------- 6.3/8.1 MB 4.8 MB/s eta 0:00:01
   -------------------------------- ------- 6.6/8.1 MB 4.7 MB/s eta 0:00:01
   -------------------------------- ------- 6.6/8.1 MB 4.7 MB/s eta 0:00:01
   ---------------------------------- ----- 7.0/8.1 MB 4.7 MB/s eta 0:00:01
   ----------------------------------- ---- 7.2/8.1 MB 4.7 MB/s eta 0:00:01
   ------------------------------------ --- 7.3/8.1 MB 4.6 MB/s eta 0:00:01
   ------------------------------------ --- 7.5/8.1 MB 4.7 MB/s eta 0:00:01
   -------------------------------------- - 7.8/8.1 MB 4.7 MB/s eta 0:00:01
   ---------------------------------------  8.1/8.1 MB 4.8 MB/s eta 0:00:01
   ---------------------------------------- 8.1/8.1 MB 4.7 MB/s eta 0:00:00
Downloading contourpy-1.3.3-cp311-cp311-win_amd64.whl (225 kB)
   ---------------------------------------- 0.0/225.2 kB ? eta -:--:--
   --------------------------------------- 225.2/225.2 kB 14.3 MB/s eta 0:00:00
Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)
Downloading fonttools-4.60.1-cp311-cp311-win_amd64.whl (2.3 MB)
   ---------------------------------------- 0.0/2.3 MB ? eta -:--:--
   - -------------------------------------- 0.1/2.3 MB 2.6 MB/s eta 0:00:01
   ------- -------------------------------- 0.5/2.3 MB 4.7 MB/s eta 0:00:01
   ------------ --------------------------- 0.7/2.3 MB 5.5 MB/s eta 0:00:01
   --------------- ------------------------ 0.9/2.3 MB 5.1 MB/s eta 0:00:01
   ------------------ --------------------- 1.0/2.3 MB 4.7 MB/s eta 0:00:01
   ------------------ --------------------- 1.0/2.3 MB 4.7 MB/s eta 0:00:01
   ------------------------- -------------- 1.5/2.3 MB 4.7 MB/s eta 0:00:01
   ----------------------------- ---------- 1.7/2.3 MB 4.7 MB/s eta 0:00:01
   ---------------------------------- ----- 1.9/2.3 MB 4.8 MB/s eta 0:00:01
   -------------------------------------- - 2.2/2.3 MB 4.9 MB/s eta 0:00:01
   ---------------------------------------  2.3/2.3 MB 4.7 MB/s eta 0:00:01
   ---------------------------------------- 2.3/2.3 MB 4.4 MB/s eta 0:00:00
Downloading kiwisolver-1.4.9-cp311-cp311-win_amd64.whl (73 kB)
   ---------------------------------------- 0.0/73.8 kB ? eta -:--:--
   -------------------------------------- - 71.7/73.8 kB ? eta -:--:--
   ---------------------------------------- 73.8/73.8 kB 1.4 MB/s eta 0:00:00
Using cached pillow-11.3.0-cp311-cp311-win_amd64.whl (7.0 MB)
Downloading pyparsing-3.2.5-py3-none-any.whl (113 kB)
   ---------------------------------------- 0.0/113.9 kB ? eta -:--:--
   ---------------------------------------- 113.9/113.9 kB 3.3 MB/s eta 0:00:00
Installing collected packages: pyparsing, pillow, kiwisolver, fonttools, cycler, contourpy, matplotlib
Successfully installed contourpy-1.3.3 cycler-0.12.1 fonttools-4.60.1 kiwisolver-1.4.9 matplotlib-3.10.6 pillow-11.3.0 pyparsing-3.2.5
[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
In [1]:
!pip install pandas
Collecting pandas
  Downloading pandas-2.3.3-cp311-cp311-win_amd64.whl.metadata (19 kB)
Collecting numpy>=1.23.2 (from pandas)
  Using cached numpy-2.3.3-cp311-cp311-win_amd64.whl.metadata (60 kB)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from pandas) (2.9.0.post0)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Requirement already satisfied: six>=1.5 in c:\users\abane\music\da_prject internship\env\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)
Downloading pandas-2.3.3-cp311-cp311-win_amd64.whl (11.3 MB)
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.3 MB 325.1 kB/s eta 0:00:35
   ---------------------------------------- 0.1/11.3 MB 465.5 kB/s eta 0:00:25
    --------------------------------------- 0.1/11.3 MB 847.9 kB/s eta 0:00:14
    --------------------------------------- 0.2/11.3 MB 1.0 MB/s eta 0:00:11
   - -------------------------------------- 0.3/11.3 MB 1.2 MB/s eta 0:00:10
   - -------------------------------------- 0.4/11.3 MB 1.3 MB/s eta 0:00:09
   - -------------------------------------- 0.5/11.3 MB 1.4 MB/s eta 0:00:08
   -- ------------------------------------- 0.7/11.3 MB 1.5 MB/s eta 0:00:08
   -- ------------------------------------- 0.8/11.3 MB 1.6 MB/s eta 0:00:07
   --- ------------------------------------ 0.9/11.3 MB 1.7 MB/s eta 0:00:07
   --- ------------------------------------ 1.1/11.3 MB 1.8 MB/s eta 0:00:06
   ---- ----------------------------------- 1.2/11.3 MB 1.9 MB/s eta 0:00:06
   ---- ----------------------------------- 1.3/11.3 MB 1.9 MB/s eta 0:00:06
   ----- ---------------------------------- 1.4/11.3 MB 1.9 MB/s eta 0:00:06
   ----- ---------------------------------- 1.5/11.3 MB 2.0 MB/s eta 0:00:05
   ----- ---------------------------------- 1.6/11.3 MB 2.0 MB/s eta 0:00:05
   ------ --------------------------------- 1.8/11.3 MB 2.1 MB/s eta 0:00:05
   ------ --------------------------------- 2.0/11.3 MB 2.1 MB/s eta 0:00:05
   ------- -------------------------------- 2.1/11.3 MB 2.1 MB/s eta 0:00:05
   -------- ------------------------------- 2.3/11.3 MB 2.3 MB/s eta 0:00:04
   --------- ------------------------------ 2.6/11.3 MB 2.4 MB/s eta 0:00:04
   --------- ------------------------------ 2.8/11.3 MB 2.5 MB/s eta 0:00:04
   ----------- ---------------------------- 3.2/11.3 MB 2.6 MB/s eta 0:00:04
   ----------- ---------------------------- 3.2/11.3 MB 2.6 MB/s eta 0:00:04
   ----------- ---------------------------- 3.3/11.3 MB 2.5 MB/s eta 0:00:04
   ------------ --------------------------- 3.6/11.3 MB 2.6 MB/s eta 0:00:03
   ------------ --------------------------- 3.6/11.3 MB 2.6 MB/s eta 0:00:03
   ------------- -------------------------- 3.8/11.3 MB 2.6 MB/s eta 0:00:03
   -------------- ------------------------- 4.1/11.3 MB 2.7 MB/s eta 0:00:03
   --------------- ------------------------ 4.5/11.3 MB 2.8 MB/s eta 0:00:03
   ---------------- ----------------------- 4.7/11.3 MB 2.8 MB/s eta 0:00:03
   ---------------- ----------------------- 4.8/11.3 MB 2.8 MB/s eta 0:00:03
   ------------------ --------------------- 5.1/11.3 MB 2.9 MB/s eta 0:00:03
   ------------------- -------------------- 5.5/11.3 MB 3.1 MB/s eta 0:00:02
   ------------------- -------------------- 5.7/11.3 MB 3.1 MB/s eta 0:00:02
   -------------------- ------------------- 5.9/11.3 MB 3.1 MB/s eta 0:00:02
   ---------------------- ----------------- 6.3/11.3 MB 3.2 MB/s eta 0:00:02
   ---------------------- ----------------- 6.5/11.3 MB 3.2 MB/s eta 0:00:02
   ----------------------- ---------------- 6.8/11.3 MB 3.3 MB/s eta 0:00:02
   ------------------------- -------------- 7.2/11.3 MB 3.4 MB/s eta 0:00:02
   -------------------------- ------------- 7.5/11.3 MB 3.5 MB/s eta 0:00:02
   --------------------------- ------------ 7.8/11.3 MB 3.5 MB/s eta 0:00:02
   ---------------------------- ----------- 8.2/11.3 MB 3.6 MB/s eta 0:00:01
   ------------------------------ --------- 8.5/11.3 MB 3.7 MB/s eta 0:00:01
   ------------------------------ --------- 8.7/11.3 MB 3.7 MB/s eta 0:00:01
   ------------------------------- -------- 9.1/11.3 MB 3.8 MB/s eta 0:00:01
   --------------------------------- ------ 9.4/11.3 MB 3.8 MB/s eta 0:00:01
   ---------------------------------- ----- 9.9/11.3 MB 3.9 MB/s eta 0:00:01
   ------------------------------------ --- 10.2/11.3 MB 3.9 MB/s eta 0:00:01
   ------------------------------------- -- 10.7/11.3 MB 4.4 MB/s eta 0:00:01
   ---------------------------------------  11.1/11.3 MB 4.6 MB/s eta 0:00:01
   ---------------------------------------  11.3/11.3 MB 4.7 MB/s eta 0:00:01
   ---------------------------------------- 11.3/11.3 MB 4.7 MB/s eta 0:00:00
Using cached numpy-2.3.3-cp311-cp311-win_amd64.whl (13.1 MB)
Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, numpy, pandas
Successfully installed numpy-2.3.3 pandas-2.3.3 pytz-2025.2 tzdata-2025.2
[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
In [3]:
!pip install openpyxl
Collecting openpyxl
  Using cached openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Using cached et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Using cached openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Using cached et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
In [4]:
import pandas as pd

df = pd.read_excel('clinical_trial_patient_dataset.xlsx')
df = df.drop_duplicates()
df = df.replace(['NA', 'N/A', 'None', '', 'not mentioned', 'not available', 'unknown', 'absent'], pd.NA)
df = df.dropna()  # or df = df.fillna(method='ffill') for filling missing values
df.columns = [col.lower().strip() for col in df.columns]
In [6]:
# df.describle()
df.describe()
Out[6]:
age bmi systolic_bp diastolic_bp hemoglobin_g_dl creatinine_mg_dl glucose_mg_dl visit_completion_rate missed_visits medication_adherence data_quality_score
count 4500.000000 4500.000000 4500.000000 4500.000000 4500.000000 4500.000000 4500.000000 4500.000000 4500.000000 4500.000000 4500.000000
mean 54.276444 25.139889 124.275333 76.570444 12.401356 1.037151 100.129778 0.549178 1.348000 0.588802 79.869889
std 14.632067 7.958235 32.534530 21.703229 4.156810 0.384391 25.037816 0.385300 1.458703 0.408459 11.953774
min 18.000000 0.000000 0.000000 0.000000 0.000000 0.000000 9.000000 0.000000 0.000000 0.000000 19.300000
25% 44.000000 21.500000 114.000000 68.000000 11.700000 0.850000 83.000000 0.000000 0.000000 0.000000 72.500000
50% 54.000000 25.700000 129.000000 79.000000 13.200000 1.080000 101.000000 0.740000 1.000000 0.810000 81.900000
75% 64.000000 29.900000 143.000000 90.000000 14.700000 1.290000 117.000000 0.860000 2.000000 0.910000 89.100000
max 85.000000 54.700000 199.000000 137.000000 21.900000 2.210000 198.000000 1.000000 9.000000 1.000000 99.900000
In [7]:
df.to_csv('cleaned_data_from_clinical_set.csv', index=False)
In [13]:
import matplotlib.pyplot as plt

stages = ['Screened', 'Enrolled', 'Randomized']
counts = [1000, 700, 600]  # replace with your data

plt.figure(figsize=(8,5))
plt.bar(stages, counts, color='skyblue')
plt.title('Recruitment Funnel')
plt.xlabel('Stages')
plt.ylabel('Number of Patients')
plt.show()
No description has been provided for this image
In [16]:
# import seaborn as sns
# import pandas as pd

# # df with 'enrollment_date' column
# # df['enrollment_date'] = pd.to_datetime(df['enrollment_date'])
# # df_enroll = df.groupby('enrollment_date').size().reset_index(name='count')
# df['enrollment_date'] = df['enrollment_date'].replace('Not Mentioned ', pd.NaT)
# df['enrollment_date'] = pd.to_datetime(df['enrollment_date'], errors='coerce')


# plt.figure(figsize=(12,6))
# sns.lineplot(data=df_enroll, x='enrollment_date', y='count')
# plt.title('Enrollment Trend Over Time')
# plt.xlabel('Date')
# plt.ylabel('Enrollments')
# plt.show()


import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

# Replace invalid dates and convert to datetime
df['enrollment_date'] = df['enrollment_date'].replace('Not Mentioned ', pd.NaT)
df['enrollment_date'] = pd.to_datetime(df['enrollment_date'], errors='coerce')

# Create df_enroll by grouping by enrollment_date and counting
df_enroll = df.groupby('enrollment_date').size().reset_index(name='count')

plt.figure(figsize=(12,6))
sns.lineplot(data=df_enroll, x='enrollment_date', y='count')
plt.title('Enrollment Trend Over Time')
plt.xlabel('Date')
plt.ylabel('Enrollments')
plt.show()
No description has been provided for this image
In [17]:
sites = ['Site A', 'Site B', 'Site C']  # replace with your site names
enrollments = [200, 150, 100]  # replace with your data

plt.figure(figsize=(10,5))
plt.barh(sites, enrollments, color='lightgreen')
plt.title('Site Performance Leaderboard')
plt.xlabel('Number of Enrollments')
plt.ylabel('Sites')
plt.show()
No description has been provided for this image
In [28]:
# # df_pivot: pivot table with patients as rows, visits as columns, adherence as values
# import numpy as np

# df_pivot = df.pivot('patient_id', 'visit_number', 'adherence_flag')
# plt.figure(figsize=(12,8))
# sns.heatmap(df_pivot.fillna(0), cmap='YlGnBu', cbar=True)
# plt.title('Patient Adherence Heatmap')
# plt.xlabel('Visit Number')
# plt.ylabel('Patient ID')
# plt.show()

# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns

# df_pivot = df.pivot(index='patient_id', columns='visit_number', values='adherence_flag')
# plt.figure(figsize=(12,8))
# sns.heatmap(df_pivot.fillna(0), cmap='YlGnBu', cbar=True)
# plt.title('Patient Adherence Heatmap')
# plt.xlabel('Visit Number')
# plt.ylabel('Patient ID')
# plt.show()


# print(df.columns)

# import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns

# # Load dataset
# # df = pd.read_csv('cleaned_data_from_clinical_set.csv')
# df_pivot = df.pivot(index='patient_id', columns='visit_number', values='adherence_flag')

# # Aggregate mean visit completion rate by site and gender
# df_grouped = df.groupby(['site_id', 'gender'])['visit_completion_rate'].mean().unstack()
# # df_pivot = df.pivot(index='patient_id', columns='visit_number', values='adherence_flag')


# plt.figure(figsize=(12,7))
# sns.heatmap(df_grouped, annot=True, fmt=".2f", cmap='YlGnBu', cbar_kws={'label': 'Avg Visit Completion Rate'})
# plt.title('Average Visit Completion Rate by Site and Gender')
# plt.xlabel('Gender')
# plt.ylabel('Site ID')
# plt.show()

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('cleaned_data_from_clinical_set.csv')

# Aggregate mean visit completion rate by site and gender
df_grouped = df.groupby(['site_id', 'gender'])['visit_completion_rate'].mean().unstack()

plt.figure(figsize=(12,7))
sns.heatmap(df_grouped, annot=True, fmt=".2f", cmap='YlGnBu', cbar_kws={'label': 'Avg Visit Completion Rate'})
plt.title('Average Visit Completion Rate by Site and Gender')
plt.xlabel('Gender')
plt.ylabel('Site ID')
plt.show()
No description has been provided for this image
In [39]:
# import plotly.express as px

# dropout_counts = df['dropout_reason'].value_counts().reset_index()
# dropout_counts.columns = ['reason', 'count']

# fig = px.pie(dropout_counts, values='count', names='reason', title='Dropout Reasons')
# fig.show()


# import pandas as pd
# import plotly.express as px

# # Load the dataset
# df = pd.read_csv('cleaned_data_from_clinical_set.csv')

# # Calculate dropout reason counts
# dropout_counts = df['dropout_reason'].value_counts().reset_index()
# dropout_counts.columns = ['reason', 'count']

# # Create pie chart
# fig = px.pie(dropout_counts, values='count', names='reason', title='Dropout Reasons')
# fig.show()



import pandas as pd
import plotly.express as px

df = pd.read_csv('cleaned_data_from_clinical_set.csv')

# Filter out 'Not Mentioned'
filtered_df = df[df['dropout_reason'] != 'Not Mentioned']

dropout_counts = filtered_df['dropout_reason'].value_counts().reset_index()
dropout_counts.columns = ['reason', 'count']

fig = px.pie(dropout_counts, values='count', names='reason', title='Dropout Reasons (Excluding Not Mentioned)')
fig.show(renderer="notebook")
In [41]:
import pandas as pd
import plotly.express as px

# Load dataset
df = pd.read_csv('cleaned_data_from_clinical_set.csv')

# Create and show age distribution histogram
fig = px.histogram(df, x='age', nbins=30, title='Age Distribution')
fig.show(renderer="notebook")